This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

### INSTALLING PACKAGES ###
#if (!require("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")

#BiocManager::install(c("readr", "enrichplot", "DOSE", "clusterProfiler", "AnnotationHub", "ensembldb", "tidyverse", "AnnotationDbi", "org.At.tair.db", "pathview", "gageData", "ggupset", "GOCompare", "UpSetR"))

###Loading Library
suppressPackageStartupMessages(library("org.At.tair.db"))
#suppressPackageStartupMessages(library(DOSE)) #for human? Disease Ontology Semantic and Enrichment analysis
#suppressPackageStartupMessages(library(pathview)) #calls specific pathway
library(clusterProfiler) #A universal enrichment tool for interpreting omics data #prettydoc
library(GOSemSim)
#library(AnnotationHub)
#library(ensembldb)
library(readr)
#library(ggupset)
# Loading relevant libraries 
library(tidyverse) # includes ggplot2, for data visualisation. dplyr, for data manipulation.
#library(RColorBrewer) # for a colourful plot
#library(pheatmap)
#library(enrichplot) # for visualisations
##library(ggupset) # for visualisations
#library(AnnotationDbi)
#library(org.At.tair.db)
#class(org.At.tair.db)
#columns(org.At.tair.db)
library(VennDiagram)
library(ggvenn)
library(readr)

setwd("~/Desktop/Lab/Working_Projects/trapseq_2023")
# Clean environment
rm(list = ls(all.names = TRUE)) # will clear all objects including hidden objects
gc() # free up memory and report the memory usage
           used  (Mb) gc trigger  (Mb) limit (Mb)
Ncells  7217262 385.5   18477105 986.8         NA
Vcells 15153070 115.7   55691654 424.9      32768
       max used  (Mb)
Ncells 18477105 986.8
Vcells 55691654 424.9
options(max.print = .Machine$integer.max, scipen = 0, stringsAsFactors = F, dplyr.summarise.inform = F) # avoid truncated output in R console and scientific notation
df <- read.csv(paste0(file = "deg_analysis_DESeq_all.csv"), row.names = 1)
ALL_DEG <- data.frame(df, row.names = df$row )
geneID<- ALL_DEG$row
## Prepare deg results -----------------------------------------------
ALL_DEG <- ALL_DEG %>% mutate(diffexpressed = case_when(
  log2FoldChange > 1 & padj < 0.01 ~ 'UP',
  log2FoldChange < 1 & padj < 0.01 ~ 'DOWN',
  padj > 0.01 ~ 'NO'
))

# Remove non-significant genes
ALL_DEG_table <- ALL_DEG[ALL_DEG$diffexpressed != 'NO', ]

# Substitute names so they are annotated nicely in the heatmap later
unique(ALL_DEG_table$diffexpressed)
[1] "UP"   "DOWN" NA    
# Split the dataframe into a list of sub-dataframes: upregulated, downregulated genes
deg_results_list <- split(ALL_DEG_table, ALL_DEG_table$diffexpressed)

colnames(ALL_DEG_table)
[1] "row"            "baseMean"       "log2FoldChange"
[4] "lfcSE"          "stat"           "pvalue"        
[7] "padj"           "diffexpressed" 
###add information to table
ALL_DEG_table$symbol = mapIds(org.At.tair.db, 
                    keys= row.names(ALL_DEG_table), 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")
'select()' returned 1:many mapping between keys and
columns
ALL_DEG_table$entrez = mapIds(org.At.tair.db, 
                    keys=row.names(ALL_DEG_table), 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
'select()' returned 1:1 mapping between keys and
columns
library(GOSemSim)
atGO <- godata('org.At.tair.db', ont="BP")
preparing gene to GO mapping data...
preparing IC data...
library(clusterProfiler)
eGO <- enrichGO(gene         = ALL_DEG_table$entrez,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.01)
head(eGO, 10)
#goplot(eGO)
#barplot(eGO)
##dotplot(eGO, showCategory=10, color = "p.adjust") + ggtitle("Dotplot for ORA")

### Output results from GO analysis to a table
#cluster_summary <- data.frame(eGO_dwnIM)
#write.csv(cluster_summary, "eGO_dwnIMclusterProfiler_oe.csv")
               
gGO <- groupGO(gene     = ALL_DEG_table$entrez,
               OrgDb    = 'org.At.tair.db',
               ont      = "BP",
               level    = 3,
               readable = TRUE)

head(gGO)
library(readr)
IMdwn<- read_table("dwnLFC1_01_IM.csv", col_names = TRUE) 

###clean up table: remove columns, add column names, remove all genes with na values, and duplicate genes
IMdwn_tb <- IMdwn[ ,-c(4:5)] %>% 
  data.frame()  %>% 
  as_tibble() %>% 
  na.omit() %>%
  arrange(desc(UMVsIM_log2FoldChange))

###add information to table
IMdwn_tb$symbol = mapIds(org.At.tair.db, 
                    keys= IMdwn_tb$Row.names, 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")
IMdwn_tb$entrez = mapIds(org.At.tair.db, 
                    keys=IMdwn_tb$Row.names, 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
dwnIM_genes<- IMdwn_tb$entrez
#GO enrichment refers specifically to gene ontology. GO provides the framework and set of concepts for describing the functions of gene products from all organisms.GO integrates information about gene product function in the context of three domains:
eGO1 <- enrichGO(gene         = dwnIM_genes,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                qvalueCutoff  = 0.01)
eGO1 # 243 enriched terms found
#
# over-representation test
#
#...@organism    Arabidopsis thaliana 
#...@ontology    BP 
#...@keytype     ENTREZID 
#...@gene    chr [1:5851] "832220" "817676" "826407" "819048" "829037" "816530" "827038" "841692" "826602" "831543" "824534" ...
#...pvalues adjusted by 'BH' with cutoff <0.05 
#...243 enriched terms found
'data.frame':   243 obs. of  9 variables:
 $ ID         : chr  "GO:0015979" "GO:0019684" "GO:0009657" "GO:0006091" ...
 $ Description: chr  "photosynthesis" "photosynthesis, light reaction" "plastid organization" "generation of precursor metabolites and energy" ...
 $ GeneRatio  : chr  "176/5681" "119/5681" "179/5681" "204/5681" ...
 $ BgRatio    : chr  "257/25585" "167/25585" "323/25585" "410/25585" ...
 $ pvalue     : num  6.07e-57 1.13e-41 8.07e-39 5.42e-35 6.82e-27 ...
 $ p.adjust   : num  1.34e-53 1.24e-38 5.93e-36 2.99e-32 3.01e-24 ...
 $ qvalue     : num  1.04e-53 9.63e-39 4.60e-36 2.32e-32 2.34e-24 ...
 $ geneID     : chr  "816733/828722/841853/835328/825716/819353/840099/837764/829729/832410/839350/836262/843988/828096/833461/821673"| __truncated__ "816733/840099/829729/839350/833461/821673/828489/840297/819185/816078/817646/819992/823814/824242/844245/831799"| __truncated__ "841637/839418/817595/832183/817170/841248/837198/819740/828912/830913/829660/832298/835175/837797/832346/840099"| __truncated__ "835464/838889/815120/836704/821163/816561/827485/831150/816781/816733/829679/830419/814643/819247/817221/837214"| __truncated__ ...
 $ Count      : int  176 119 179 204 159 128 131 139 88 102 ...
#...Citation
 T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
 clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
 The Innovation. 2021, 2(3):100141 
#dotplot(eGO1, x = "GeneRatio", showCategory = 10,label_format = 30 , color = "p.adjust")+ ggtitle("Dotplot for eGO")
#goplot(eGO1 , color = "p.adjust")

### Output results from GO analysis to a table
#cluster_summary <- data.frame(eGO_dwnIM)
#write.csv(cluster_summary, "eGO_dwnIMclusterProfiler_oe.csv")


###Reduce redundancy
#Bc the nature of GO, it is organized as a directed acyclic graph, parent terms can show enrichment due to over rep child terms. 
# to fix use simplify() function 
s_eGO1<-clusterProfiler::simplify(eGO1)
s_eGO1 #89 terms
#
# over-representation test
#
#...@organism    Arabidopsis thaliana 
#...@ontology    BP 
#...@keytype     ENTREZID 
#...@gene    chr [1:5851] "832220" "817676" "826407" "819048" "829037" "816530" "827038" "841692" "826602" "831543" "824534" ...
#...pvalues adjusted by 'BH' with cutoff <0.05 
#...89 enriched terms found
'data.frame':   89 obs. of  9 variables:
 $ ID         : chr  "GO:0015979" "GO:0009657" "GO:0033013" "GO:0007017" ...
 $ Description: chr  "photosynthesis" "plastid organization" "tetrapyrrole metabolic process" "microtubule-based process" ...
 $ GeneRatio  : chr  "176/5681" "179/5681" "159/5681" "131/5681" ...
 $ BgRatio    : chr  "257/25585" "323/25585" "323/25585" "251/25585" ...
 $ pvalue     : num  6.07e-57 8.07e-39 6.82e-27 2.01e-25 4.63e-22 ...
 $ p.adjust   : num  1.34e-53 5.93e-36 3.01e-24 6.33e-23 1.13e-19 ...
 $ qvalue     : num  1.04e-53 4.60e-36 2.34e-24 4.92e-23 8.81e-20 ...
 $ geneID     : chr  "816733/828722/841853/835328/825716/819353/840099/837764/829729/832410/839350/836262/843988/828096/833461/821673"| __truncated__ "841637/839418/817595/832183/817170/841248/837198/819740/828912/830913/829660/832298/835175/837797/832346/840099"| __truncated__ "824336/843142/831378/837528/829322/822208/826957/819059/832107/831004/818494/824515/843330/841266/834682/843310"| __truncated__ "842782/820142/820782/831870/824198/826474/843928/820052/834213/839878/832393/832060/815304/839678/818733/839401"| __truncated__ ...
 $ Count      : int  176 179 159 131 88 102 120 148 87 89 ...
#...Citation
 T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
 clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
 The Innovation. 2021, 2(3):100141 
dotplot(s_eGO1,showCategory=20,font.size=10,label_format=70)+
scale_size_continuous(range=c(1, 7))+
theme_minimal() +
ggtitle("GO Enrichment of down im-regulated genes")
Scale for size is already present.
Adding another scale for size, which will replace the existing scale.

# color genes by log2 fold changes; create named vector
foldchanges <- IMdwn_tb$UMVsIM_log2FoldChange
names(foldchanges) <- IMdwn_tb$symbol
## by default cnetplot gives the top 5 significant terms
#if we want to focus on specific terms we can subset our results
s_ego1loli <- s_eGO1
#s_ego1loli@result<-s_eGO1[c(6,15,32),]
cnetplot(s_ego1loli, color.params = list(foldChange = foldchanges),
color_category="purple")
ggo1 <- groupGO(gene= dwnIM_genes,
               OrgDb= 'org.At.tair.db',
               ont= "BP",
               level = 3,
               keyType = "ENTREZID",
               readable= TRUE)
head(ggo1)
cnetplot(ggo1, showCategory=5,
                      categorySize="pvalue",
                      color.params = list(foldChange = IMdwn_tb$UMVsIM_log2FoldChange),
                      order=TRUE)
search_kegg_organism('ath', by='kegg_code')
search_kegg_organism('Arabidopsis thaliana', by='scientific_name')

# omit any NA values 
kegg_gene_list<-na.omit(dwnIM_genes)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(dwnIM_genes, decreasing = TRUE)

#ora with kegg data base
ora_analysis_kegg <- enrichKEGG(gene = kegg_gene_list,
                                universe =ALL_DEG_table,
                                organism = "ath",
                                keyType = "ncbi-geneid",
                                minGSSize = 10,
                                maxGSSize = 500,
                                pAdjustMethod = "BH",
                                qvalueCutoff = 0.01,
                                use_internal_data = FALSE) # force to query latest KEGG db
Reading KEGG annotation online: "https://rest.kegg.jp/link/ath/pathway"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/ath"...
Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/ath"...
`universe` is not in character and will be ignored...
head(ora_analysis_kegg)

ora_analysis_kegg_modules <- enrichMKEGG(gene =kegg_gene_list,
                                         universe = ALL_DEG_table,
                                         organism = "ath",
                                         keyType = "ncbi-geneid",
                                         minGSSize = 10,           # minimal size of genes annotated by Ontology term for testing.
                                         maxGSSize = 500,          # maximal size of genes annotated for testing
                                         pAdjustMethod = "BH",
                                         qvalueCutoff = 0.01)
Reading KEGG annotation online: "https://rest.kegg.jp/link/ath/module"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/ath"...
`universe` is not in character and will be ignored...
ggplot(ora_analysis_kegg)

#browseKEGG(ora_analysis_kegg, 'ath00195')
library("pathview")
#ath04145 <- pathview(gene.data  = allOE_genes, pathway.id = "ath00195", species    = "ath")

# create a simple dotplot graph
dotplot(ora_analysis_kegg, 
    color = "qvalue", 
    showCategory = 5, 
    size = "Count")



ridgeplot(ora_analysis_kegg, showCategory = 30) + labs(title = "KEGG_Pathway Analysis", x = "enrichment distribution") + scale_fill_gradient(low = "#56B1F7", high = "#132B43")
Error in (function (classes, fdef, mtable)  : 
  unable to find an inherited method for function ‘ridgeplot’ for signature ‘"enrichResult"’
mlist<-list(s_eGO1,ora_analysis_kegg)
names(mlist)<-c("GO-enrich","KEGG-enrich")
mresult<-merge_result(mlist)
dotplot(mresult,showCategory=5)

NA
NA
IMup<- read_table("upLFC1_01_IM.csv",  col_names = TRUE)

###cleaning up table
IMup_tb <- IMup[ ,-c(4:5)] %>% 
  data.frame()  %>% 
  as_tibble() %>% 
  na.omit() %>%
  arrange(desc(UMVsIM_log2FoldChange))

###add information to table
IMup_tb$symbol = mapIds(org.At.tair.db, 
                    keys= IMup_tb$Row.names, 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")

IMup_tb$entrez = mapIds(org.At.tair.db, 
                    keys=IMup_tb$Row.names, 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
upIM_genes<- IMup_tb$entrez
eGO2 <- enrichGO(gene         = upIM_genes,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                qvalueCutoff  = 0.01)

#dotplot(eGO2, x = "GeneRatio", showCategory = 10,label_format = 30 , color = "p.adjust")+ ggtitle("Dotplot for eGO")
#goplot(eGO2, color = "p.adjust")
#barplot(eGO2, color = "p.adjust")

### Output results from GO analysis to a table
#upIM_cluster_summary <- data.frame(eGO2)
#write.csv(upIM_cluster_summary, "eGO2IMclusterProfiler_oe.csv")

###Reduce redundancy
s_eGO2<-clusterProfiler::simplify(eGO2)
s_eGO2 #89 terms
#
# over-representation test
#
#...@organism    Arabidopsis thaliana 
#...@ontology    BP 
#...@keytype     ENTREZID 
#...@gene    chr [1:2905] NA "819032" "840292" "832380" "816469" "837527" "825182" ...
#...pvalues adjusted by 'BH' with cutoff <0.05 
#...82 enriched terms found
'data.frame':   82 obs. of  9 variables:
 $ ID         : chr  "GO:0009751" "GO:0090693" "GO:0002237" "GO:0001666" ...
 $ Description: chr  "response to salicylic acid" "plant organ senescence" "response to molecule of bacterial origin" "response to hypoxia" ...
 $ GeneRatio  : chr  "239/2820" "193/2820" "101/2820" "150/2820" ...
 $ BgRatio    : chr  "427/25585" "372/25585" "153/25585" "342/25585" ...
 $ pvalue     : num  1.31e-116 3.67e-86 2.92e-59 1.07e-54 5.42e-42 ...
 $ p.adjust   : num  2.39e-113 3.35e-83 1.33e-56 3.92e-52 1.24e-39 ...
 $ qvalue     : num  2.09e-113 2.92e-83 1.16e-56 3.42e-52 1.08e-39 ...
 $ geneID     : chr  "826642/815864/833667/820608/821135/838719/842988/843660/841620/7922438/826708/838122/828272/820307/837912/82258"| __truncated__ "818143/826642/818959/815864/838719/821706/818171/828272/843192/820307/837912/824905/828031/843353/838053/839950"| __truncated__ "820608/838719/843660/821832/828272/835391/821353/5007772/824681/839184/842190/841606/841798/827913/817828/83429"| __truncated__ "838508/818143/835032/839183/7922438/823751/843192/843126/841797/5007772/825366/839184/837098/824745/835855/8421"| __truncated__ ...
 $ Count      : int  239 193 101 150 80 109 115 113 115 83 ...
#...Citation
 T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
 clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
 The Innovation. 2021, 2(3):100141 
dotplot(s_eGO2,showCategory=20,font.size=10,label_format=70)+
scale_size_continuous(range=c(1, 7))+
theme_minimal() +
ggtitle("GO Enrichment of up im-regulated genes")
Scale for size is already present.
Adding another scale for size, which will replace the existing scale.

mlist<-list(s_eGO1,s_eGO2)
names(mlist)<-c("IMdwn-GOenrich","IMUP-GOenrich")
mresult<-merge_result(mlist)
dotplot(mresult,showCategory=10)

NA
NA
search_kegg_organism('ath', by='kegg_code')
search_kegg_organism('Arabidopsis thaliana', by='scientific_name')

# omit any NA values 
kegg_gene_list<-na.omit(upIM_genes)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(upIM_genes, decreasing = TRUE)


ora_analysis_kegg <- enrichKEGG(gene = kegg_gene_list,
                                universe =ALL_DEG_table,
                                organism = "ath",
                                keyType = "ncbi-geneid",
                                minGSSize = 10,
                                maxGSSize = 500,
                                pAdjustMethod = "BH",
                                qvalueCutoff = 0.01,
                                use_internal_data = FALSE) # force to query latest KEGG db
Reading KEGG annotation online: "https://rest.kegg.jp/link/ath/pathway"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/ath"...
Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/ath"...
`universe` is not in character and will be ignored...
head(ora_analysis_kegg)

ora_analysis_kegg_modules <- enrichMKEGG(gene =kegg_gene_list,
                                         universe = ALL_DEG_table,
                                         organism = "ath",
                                         keyType = "ncbi-geneid",
                                         minGSSize = 10,           # minimal size of genes annotated by Ontology term for testing.
                                         maxGSSize = 500,          # maximal size of genes annotated for testing
                                         pAdjustMethod = "BH",
                                         qvalueCutoff = 0.01)
Reading KEGG annotation online: "https://rest.kegg.jp/link/ath/module"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/ath"...
`universe` is not in character and will be ignored...
ggplot(ora_analysis_kegg)

#browseKEGG(ora_analysis_kegg, 'ath00195')
library("pathview")
#ath04145 <- pathview(gene.data  = allOE_genes, pathway.id = "ath00195", species    = "ath")

# create a simple dotplot graph
dotplot(ora_analysis_kegg, 
    color = "qvalue", 
    showCategory = 5, 
    size = "Count")



ridgeplot(ora_analysis_kegg, showCategory = 30) + labs(title = "KEGG_Pathway Analysis", x = "enrichment distribution") + scale_fill_gradient(low = "#56B1F7", high = "#132B43")
Error in (function (classes, fdef, mtable)  : 
  unable to find an inherited method for function ‘ridgeplot’ for signature ‘"enrichResult"’
gGO2 <- groupGO(gene     = upIM_genes,
               OrgDb    = 'org.At.tair.db',
               ont      = "BP",
               level    = 3,
               readable = TRUE)

head(gGO2)
Hdwn<- read_table("downLFC1_0.01_H.csv", col_names = TRUE)

── Column specification ──────────────────────────────────────────────────────────
cols(
  Row.names = col_character(),
  UMVsIM_log2FoldChange = col_double(),
  UMVsIM_padj = col_double(),
  UMVsH_log2FoldChange = col_double(),
  UMVsH_padj = col_double()
)
###cleaning up table: remove columns, add column names, remove all genes with na values, and duplicate genes
Hdwn_tb <- Hdwn[ ,-c(2:3)] %>% 
  data.frame()  %>% 
  as_tibble() %>% 
  na.omit() %>%
  arrange(desc(UMVsH_log2FoldChange))

###add information to table
Hdwn_tb$symbol = mapIds(org.At.tair.db, 
                    keys= Hdwn_tb$Row.names, 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")
'select()' returned 1:many mapping between keys and columns
Hdwn_tb$entrez = mapIds(org.At.tair.db, 
                    keys=Hdwn_tb$Row.names, 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
'select()' returned 1:1 mapping between keys and columns
Hdwn_genes<- Hdwn_tb$entrez
eGO3 <- enrichGO(gene         = Hdwn_genes,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                qvalueCutoff  = 0.01)

#dotplot(eGO3, x = "GeneRatio", showCategory = 10,label_format = 30 , color = "p.adjust")+ ggtitle("Dotplot for eGO")
#goplot(eGO3, color = "p.adjust")
#barplot(eGO3, color = "p.adjust")

### Output results from GO analysis to a table
#DwnH_cluster_summary <- data.frame(eGO3)
#write.csv(DwnH_cluster_summary, "eGO3_dwnHclusterProfiler_oe.csv")

s_eGO3<-clusterProfiler::simplify(eGO3)
s_eGO3 #89 terms
#
# over-representation test
#
#...@organism    Arabidopsis thaliana 
#...@ontology    BP 
#...@keytype     ENTREZID 
#...@gene    chr [1:7678] "828907" "830731" "819671" "826474" "827337" "825529" "839669" ...
#...pvalues adjusted by 'BH' with cutoff <0.05 
#...104 enriched terms found
'data.frame':   104 obs. of  9 variables:
 $ ID         : chr  "GO:0015979" "GO:0009657" "GO:0033013" "GO:0009642" ...
 $ Description: chr  "photosynthesis" "plastid organization" "tetrapyrrole metabolic process" "response to light intensity" ...
 $ GeneRatio  : chr  "197/7433" "201/7433" "175/7433" "193/7433" ...
 $ BgRatio    : chr  "257/25585" "323/25585" "323/25585" "385/25585" ...
 $ pvalue     : num  1.45e-56 1.63e-35 1.96e-21 1.69e-18 8.36e-18 ...
 $ p.adjust   : num  3.21e-53 1.20e-32 7.26e-19 5.34e-16 2.32e-15 ...
 $ qvalue     : num  2.43e-53 9.12e-33 5.49e-19 4.04e-16 1.75e-15 ...
 $ geneID     : chr  "836059/829911/841853/829266/842789/843989/838447/837479/822416/826599/835328/828893/825716/840099/839350/842072"| __truncated__ "828912/829266/818253/842789/840038/819740/829506/817170/837198/827505/830913/816846/837583/832756/841637/835165"| __truncated__ "831378/829266/819059/831225/818552/823878/820143/821463/826957/836824/817379/820686/833542/839621/829322/841162"| __truncated__ "825432/829911/826093/838879/837555/817170/838447/843737/825286/830756/817857/826914/843729/842760/838071/825497"| __truncated__ ...
 $ Count      : int  197 201 175 193 176 94 103 106 131 67 ...
#...Citation
 T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
 clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
 The Innovation. 2021, 2(3):100141 
dotplot(s_eGO3,showCategory=20,font.size=10,label_format=70)+
scale_size_continuous(range=c(1, 7))+
theme_minimal() +
ggtitle("GO Enrichment of down im-regulated genes")
Scale for size is already present.
Adding another scale for size, which will replace the existing scale.

gGO3 <- groupGO(gene     = Hdwn_genes,
               OrgDb    = 'org.At.tair.db',
               ont      = "BP",
               level    = 3,
               readable = TRUE)

head(gGO3)
search_kegg_organism('ath', by='kegg_code')
search_kegg_organism('Arabidopsis thaliana', by='scientific_name')

# omit any NA values 
kegg_gene_list<-na.omit(Hdwn_genes)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(Hdwn_genes, decreasing = TRUE)


ora_analysis_kegg <- enrichKEGG(gene = kegg_gene_list,
                                universe =ALL_DEG_table,
                                organism = "ath",
                                keyType = "ncbi-geneid",
                                minGSSize = 10,
                                maxGSSize = 500,
                                pAdjustMethod = "BH",
                                qvalueCutoff = 0.01,
                                use_internal_data = FALSE) # force to query latest KEGG db
Reading KEGG annotation online: "https://rest.kegg.jp/link/ath/pathway"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/ath"...
Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/ath"...
`universe` is not in character and will be ignored...
head(ora_analysis_kegg)

ora_analysis_kegg_modules <- enrichMKEGG(gene =kegg_gene_list,
                                         universe = ALL_DEG_table,
                                         organism = "ath",
                                         keyType = "ncbi-geneid",
                                         minGSSize = 10,           # minimal size of genes annotated by Ontology term for testing.
                                         maxGSSize = 500,          # maximal size of genes annotated for testing
                                         pAdjustMethod = "BH",
                                         qvalueCutoff = 0.01)
Reading KEGG annotation online: "https://rest.kegg.jp/link/ath/module"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/ath"...
`universe` is not in character and will be ignored...
ggplot(ora_analysis_kegg)

#browseKEGG(ora_analysis_kegg, 'ath00195')
library("pathview")
#ath04145 <- pathview(gene.data  = allOE_genes, pathway.id = "ath00195", species    = "ath")

# create a simple dotplot graph
dotplot(ora_analysis_kegg, 
    color = "qvalue", 
    showCategory = 5, 
    size = "Count")



ridgeplot(ora_analysis_kegg, showCategory = 30) + labs(title = "KEGG_Pathway Analysis", x = "enrichment distribution") + scale_fill_gradient(low = "#56B1F7", high = "#132B43")
Error in (function (classes, fdef, mtable)  : 
  unable to find an inherited method for function ‘ridgeplot’ for signature ‘"enrichResult"’
Hup<- read_table( "upLFC1_0.01_H.csv", col_names = TRUE)

###cleaning up table
Hup_tb <- Hup[ ,-c(2:3)] %>% 
  data.frame()  %>% 
  as_tibble() %>% 
  na.omit() %>%
  arrange(desc(UMVsH_log2FoldChange))

###add information to table
Hup_tb$symbol = mapIds(org.At.tair.db, 
                    keys= Hup_tb$Row.names, 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")
Hup_tb$entrez = mapIds(org.At.tair.db, 
                    keys=Hup_tb$Row.names, 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
upH_genes<- Hup_tb$entrez
eGO4 <- enrichGO(gene         = upH_genes,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.01)

#dotplot(eGO4, x = "GeneRatio", showCategory = 10,label_format = 30 , color = "p.adjust")+ ggtitle("Dotplot for eGO")
#goplot(eGO4, color = "p.adjust")
#barplot(eGO4, color = "p.adjust")

### Output results from GO analysis to a table
#upH_cluster_summary <- data.frame(eGO4)
#write.csv(upH_cluster_summary, "eGO4_upHclusterProfiler_oe.csv")

s_eGO4<-clusterProfiler::simplify(eGO4)
s_eGO4 #89 terms
#
# over-representation test
#
#...@organism    Arabidopsis thaliana 
#...@ontology    BP 
#...@keytype     ENTREZID 
#...@gene    chr [1:3586] "816469" "825182" "816699" "836823" "836802" "827212" "827113" ...
#...pvalues adjusted by 'BH' with cutoff <0.01 
#...89 enriched terms found
'data.frame':   89 obs. of  9 variables:
 $ ID         : chr  "GO:0009751" "GO:0090693" "GO:0002237" "GO:0001666" ...
 $ Description: chr  "response to salicylic acid" "plant organ senescence" "response to molecule of bacterial origin" "response to hypoxia" ...
 $ GeneRatio  : chr  "250/3473" "200/3473" "113/3473" "148/3473" ...
 $ BgRatio    : chr  "427/25585" "372/25585" "153/25585" "342/25585" ...
 $ pvalue     : num  4.78e-107 1.58e-76 8.16e-65 8.61e-42 2.14e-40 ...
 $ p.adjust   : num  9.42e-104 1.56e-73 4.02e-62 3.40e-39 5.27e-38 ...
 $ qvalue     : num  7.86e-104 1.30e-73 3.35e-62 2.83e-39 4.40e-38 ...
 $ geneID     : chr  "843660/821135/838122/842988/833667/837912/820307/838719/820608/841609/826642/828031/826708/834101/839184/838053"| __truncated__ "818171/829015/818143/843192/843353/837912/820307/838719/824905/819184/827238/826642/828031/818902/821706/839184"| __truncated__ "843660/824681/821832/838719/820608/835391/839184/828420/5007772/841606/821353/841798/816384/828272/822222/81760"| __truncated__ "818143/835032/843192/838508/823751/836072/827238/837098/824745/839184/7922438/835855/5007772/841797/839183/8399"| __truncated__ ...
 $ Count      : int  250 200 113 148 85 129 108 131 126 99 ...
#...Citation
 T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
 clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
 The Innovation. 2021, 2(3):100141 
dotplot(s_eGO4,showCategory=20,font.size=10,label_format=70)+
scale_size_continuous(range=c(1, 7))+
theme_minimal() +
ggtitle("GO Enrichment of down im-regulated genes")
Scale for size is already present.
Adding another scale for size, which will replace the existing scale.

               
gGO4 <- groupGO(gene     = upH_genes,
               OrgDb    = 'org.At.tair.db',
               ont      = "BP",
               level    = 3,
               readable = TRUE)

#head(gGO4)
search_kegg_organism('ath', by='kegg_code')
search_kegg_organism('Arabidopsis thaliana', by='scientific_name')

# omit any NA values 
kegg_gene_list<-na.omit(upH_genes)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(upH_genes, decreasing = TRUE)


ora_analysis_kegg <- enrichKEGG(gene = kegg_gene_list,
                                universe =ALL_DEG_table,
                                organism = "ath",
                                keyType = "ncbi-geneid",
                                minGSSize = 10,
                                maxGSSize = 500,
                                pAdjustMethod = "BH",
                                qvalueCutoff = 0.01,
                                use_internal_data = FALSE) # force to query latest KEGG db
Reading KEGG annotation online: "https://rest.kegg.jp/link/ath/pathway"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/ath"...
Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/ath"...
`universe` is not in character and will be ignored...
head(ora_analysis_kegg)

ora_analysis_kegg_modules <- enrichMKEGG(gene =kegg_gene_list,
                                         universe = ALL_DEG_table,
                                         organism = "ath",
                                         keyType = "ncbi-geneid",
                                         minGSSize = 10,           # minimal size of genes annotated by Ontology term for testing.
                                         maxGSSize = 500,          # maximal size of genes annotated for testing
                                         pAdjustMethod = "BH",
                                         qvalueCutoff = 0.01)
Reading KEGG annotation online: "https://rest.kegg.jp/link/ath/module"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/module"...
Reading KEGG annotation online: "https://rest.kegg.jp/conv/ncbi-geneid/ath"...
`universe` is not in character and will be ignored...
ggplot(ora_analysis_kegg)

#browseKEGG(ora_analysis_kegg, 'ath00195')
library("pathview")
#ath04145 <- pathview(gene.data  = allOE_genes, pathway.id = "ath00195", species    = "ath")

# create a simple dotplot graph
dotplot(ora_analysis_kegg, 
    color = "qvalue", 
    showCategory = 5, 
    size = "Count")



ridgeplot(ora_analysis_kegg, showCategory = 30) + labs(title = "KEGG_Pathway Analysis", x = "enrichment distribution") + scale_fill_gradient(low = "#56B1F7", high = "#132B43")
Error in (function (classes, fdef, mtable)  : 
  unable to find an inherited method for function ‘ridgeplot’ for signature ‘"enrichResult"’
mlist<-list(s_eGO4,s_eGO3)
names(mlist)<-c("upH-GOenrich","DwnH-GOenrich")
mresult<-merge_result(mlist)
dotplot(mresult,showCategory=10)

library(gplots)
library(VennDiagram)
library(ggvenn)
 
# Create a list of sets for the Venn diagram
venn_list <- list(
  dwnIM = ego1$Description,
  upIM = eGO2$Description,
  hdwn = eGO3$Description,
  hUP = eGO4$Description
)
ggvenn(venn_list, show_elements = T, label_sep = "\n",text_size= 4,
  stroke_size = 0.5, set_name_size = 4)


list_df<- (list_to_data_frame(venn_list))

library(DT)
DT::datatable(list_df)


 
# Create a list of sets for the Venn diagram
#venn_list <- list(
dwnIM <- c(ego1$Description[1:10])
upIM <- c(eGO2$Description[1:10])
hdwn <- c(eGO3$Description[1:10])
hUP <- c(eGO4$Description[1:10])

venn_list<- list(dwnIM,upIM,hdwn,hUP)
view(venn_list)
ggvenn(venn_list, c("dwnIM", "upIM", "hdwn", "hUP"), show_elements = T, label_sep = "\n",text_size= 4,
  stroke_size = 0.5, set_name_size = 4)


list_df<- (list_to_data_frame(venn_list))

library(DT)
DT::datatable(list_df)
#Go Enriched DOWN
mlist<-list(s_eGO1,s_eGO3)
names(mlist)<-c("IMdwn-GOenrich","DwnH-GOenrich")
mresult1<-merge_result(mlist)
dotplot(mresult1,showCategory=10)


mlist<-list(s_eGO4,s_eGO2)
names(mlist)<-c("upH-GOenrich","IMup-GOenrich")
mresult2<-merge_result(mlist)
dotplot(mresult2,showCategory=10)

library(patchwork)
p1<-barplot(eGO1, plot_type="bar")
p2<-barplot(eGO2, plot_type="bar", term_metric="GeneRatio", stats_metric="pvalue")
p3<-barplot(eGO3, plot_type="bar", up_color="#E69056", down_color ="#325CAC")
p4<-barplot(eGO4, plot_type="bar", wrap_length=25)
p1+p2+p3+p4+plot_annotation(tag_levels = "A")

library(patchwork)
p1<-barplot(s_eGO1, plot_type="bar")
p2<-barplot(s_eGO2, plot_type="bar", term_metric="GeneRatio", stats_metric="pvalue")
p3<-barplot(s_eGO3, plot_type="bar", up_color="#E69056", down_color ="#325CAC")
p4<-barplot(s_eGO4, plot_type="bar", wrap_length=25)
p1+p2+p3+p4+plot_annotation(tag_levels = "A")


library(patchwork)
p1<-dotplot(eGO1,  showCategory = 5,  size = "Count")
p2<-dotplot(eGO2,  
    showCategory = 5, 
    size = "Count")
p3<-dotplot(eGO3,  
    showCategory = 5, 
    size = "Count")
p4<-dotplot(eGO4, 
    showCategory = 5, 
    size = "Count")
p1+p2+p3+p4+plot_annotation(tag_levels="A")

library(patchwork)
p1<-goplot(eGO1, showCategory = 2, size = "Count")
p2<-goplot(eGO2, showCategory = 2, size = "Count")
p3<-goplot(eGO3, showCategory = 2, size = "Count")
p4<-goplot(eGO4, showCategory = 2, size = "Count")
p1+p2+p3+p4+plot_annotation(tag_levels="A")

library(DOSE)
#enrich go plot
#pairwise termsim gets the similarity matrix, method of calculating the similarity between nodes, one of "Resnik", "Lin", "Rel", "Jiang" , "Wang" and "JC"(Jaccard similarity coefficient) methods
pwt <- pairwise_termsim(ALL_DEG_table,
                        method = "JC",
                        semData = NULL,
                        showCategory = 50)
emapplot(pwt, showCategory = 10, color = "qvalue")
p1 <- treeplot(pwt,) #The default agglomeration method in treeplot() is ward.D
p2 <- treeplot(pwt, hclust_method = "average")
aplot::plot_list(p1, p2, tag_levels='A')

#Enrichment map organises enriched terms into a network with edges weighted by the ratio of overlapping gene sets. Mutually overlapping gene sets are tending to cluster together, making it easy to identify functional modules.

p2 <- emapplot(pwt, showCategory = 10, cex_category=1.5)
p3 <- emapplot(pwt, showCategory = 10,layout="kk")
p4 <- emapplot(pwt, showCategory = 10, cex_category=1.5, layout="kk") 
cowplot::plot_grid( p2, p3, p4, ncol=2, labels=LETTERS[1:4])
library(DOSE)
## To color genes by log2 fold changes, we need to extract the log2 fold changes 
OE_foldchanges <- ALL_DEG_table$log2FoldChange
names(OE_foldchanges) <- ALL_DEG_table$gene

## Cnetplot details the genes associated with one or more terms
cnetplot(ALL_DEG_table,
         categorySize="pvalue",
         showCategory = 5,
         foldChange=OE_foldchanges,
         vertex.label.font=6)


OE_foldchanges <- ifelse(OE_foldchanges > 1, 1, OE_foldchanges)
OE_foldchanges <- ifelse(OE_foldchanges < -1, -1, OE_foldchanges)
cnetplot(ALL_DEG_table,
         categorySize="pvalue",
         showCategory = 5,
         foldChange=OE_foldchanges,
         vertex.label.font=6)
heatplot(ALL_DEG_table, showCategory = 5) #relationship between selected genes and corresponding biological concepts.
library(KEGGREST)
pathways <- keggList("pathway")
head(pathways)

library(pathview)
map <- gsub("ath04814", "", names(pathways)[2])  # remove 'path:'
pv.out <- pathview(gene.data = ALL_DEG_table, cpd.data = ALL_DEG_table, gene.idtype = "KEGG", 
    pathway.id = map, species = "ath", out.suffix = map, keys.align = "y", 
    kegg.native = T, match.data = T, key.pos = "topright")
plot.name <- paste(map, map, "png", sep = ".")
---
title: "3_Functional_Analysis"
output: html_notebook
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*. 

```{r}
### INSTALLING PACKAGES ###
#if (!require("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")

#BiocManager::install(c("readr", "enrichplot", "DOSE", "clusterProfiler", "AnnotationHub", "ensembldb", "tidyverse", "AnnotationDbi", "org.At.tair.db", "pathview", "gageData", "ggupset", "GOCompare", "UpSetR"))

###Loading Library
suppressPackageStartupMessages(library("org.At.tair.db"))
#suppressPackageStartupMessages(library(DOSE)) #for human? Disease Ontology Semantic and Enrichment analysis
#suppressPackageStartupMessages(library(pathview)) #calls specific pathway
library(clusterProfiler) #A universal enrichment tool for interpreting omics data #prettydoc
library(GOSemSim)
#library(AnnotationHub)
#library(ensembldb)
library(readr)
#library(ggupset)
# Loading relevant libraries 
library(tidyverse) # includes ggplot2, for data visualisation. dplyr, for data manipulation.
#library(RColorBrewer) # for a colourful plot
#library(pheatmap)
#library(enrichplot) # for visualisations
##library(ggupset) # for visualisations
#library(AnnotationDbi)
#library(org.At.tair.db)
#class(org.At.tair.db)
#columns(org.At.tair.db)
library(VennDiagram)
library(ggvenn)
library(readr)

setwd("~/Desktop/Lab/Working_Projects/trapseq_2023")
# Clean environment
rm(list = ls(all.names = TRUE)) # will clear all objects including hidden objects
gc() # free up memory and report the memory usage
options(max.print = .Machine$integer.max, scipen = 0, stringsAsFactors = F, dplyr.summarise.inform = F) # avoid truncated output in R console and scientific notation

```

```{r read in all diff genes}
df <- read.csv(paste0(file = "deg_analysis_DESeq_all.csv"), row.names = 1)
ALL_DEG <- data.frame(df, row.names = df$row )
geneID<- ALL_DEG$row
```

```{r}
## Prepare deg results -----------------------------------------------
ALL_DEG <- ALL_DEG %>% mutate(diffexpressed = case_when(
  log2FoldChange > 1 & padj < 0.01 ~ 'UP',
  log2FoldChange < 1 & padj < 0.01 ~ 'DOWN',
  padj > 0.01 ~ 'NO'
))

# Remove non-significant genes
ALL_DEG_table <- ALL_DEG[ALL_DEG$diffexpressed != 'NO', ]

# Substitute names so they are annotated nicely in the heatmap later
unique(ALL_DEG_table$diffexpressed)

# Split the dataframe into a list of sub-dataframes: upregulated, downregulated genes
deg_results_list <- split(ALL_DEG_table, ALL_DEG_table$diffexpressed)

colnames(ALL_DEG_table)

###add information to table
ALL_DEG_table$symbol = mapIds(org.At.tair.db, 
                    keys= row.names(ALL_DEG_table), 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")

ALL_DEG_table$entrez = mapIds(org.At.tair.db, 
                    keys=row.names(ALL_DEG_table), 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")


```

```{r}
library(GOSemSim)
atGO <- godata('org.At.tair.db', ont="BP")
library(clusterProfiler)
```

```{r}
eGO <- enrichGO(gene         = ALL_DEG_table$entrez,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.01)
head(eGO, 10)
#goplot(eGO)
#barplot(eGO)
##dotplot(eGO, showCategory=10, color = "p.adjust") + ggtitle("Dotplot for ORA")

### Output results from GO analysis to a table
#cluster_summary <- data.frame(eGO_dwnIM)
#write.csv(cluster_summary, "eGO_dwnIMclusterProfiler_oe.csv")
```

```{r}
               
gGO <- groupGO(gene     = ALL_DEG_table$entrez,
               OrgDb    = 'org.At.tair.db',
               ont      = "BP",
               level    = 3,
               readable = TRUE)

head(gGO)
```

```{r DOWN REGULATED INFECTED MESOPHYLL}
library(readr)
IMdwn<- read_table("dwnLFC1_01_IM.csv", col_names = TRUE) 

###clean up table: remove columns, add column names, remove all genes with na values, and duplicate genes
IMdwn_tb <- IMdwn[ ,-c(4:5)] %>% 
  data.frame()  %>% 
  as_tibble() %>% 
  na.omit() %>%
  arrange(desc(UMVsIM_log2FoldChange))

###add information to table
IMdwn_tb$symbol = mapIds(org.At.tair.db, 
                    keys= IMdwn_tb$Row.names, 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")
IMdwn_tb$entrez = mapIds(org.At.tair.db, 
                    keys=IMdwn_tb$Row.names, 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
dwnIM_genes<- IMdwn_tb$entrez
```


```{r GO enriched INFECTED MESOPHYLL}
#GO enrichment refers specifically to gene ontology. GO provides the framework and set of concepts for describing the functions of gene products from all organisms.GO integrates information about gene product function in the context of three domains:
eGO1 <- enrichGO(gene         = dwnIM_genes,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                qvalueCutoff  = 0.01)
eGO1 # 243 enriched terms found

#dotplot(eGO1, x = "GeneRatio", showCategory = 10,label_format = 30 , color = "p.adjust")+ ggtitle("Dotplot for eGO")
#goplot(eGO1 , color = "p.adjust")

### Output results from GO analysis to a table
#cluster_summary <- data.frame(eGO_dwnIM)
#write.csv(cluster_summary, "eGO_dwnIMclusterProfiler_oe.csv")


###Reduce redundancy
#Bc the nature of GO, it is organized as a directed acyclic graph, parent terms can show enrichment due to over rep child terms. 
# to fix use simplify() function 
s_eGO1<-clusterProfiler::simplify(eGO1)
s_eGO1 #89 terms
dotplot(s_eGO1,showCategory=20,font.size=10,label_format=70)+
scale_size_continuous(range=c(1, 7))+
theme_minimal() +
ggtitle("GO Enrichment of down im-regulated genes")
```

```{r IM DWN CNET Plot (in progress)}
# color genes by log2 fold changes; create named vector
foldchanges <- IMdwn_tb$UMVsIM_log2FoldChange
names(foldchanges) <- IMdwn_tb$symbol
## by default cnetplot gives the top 5 significant terms
#if we want to focus on specific terms we can subset our results
s_ego1loli <- s_eGO1
#s_ego1loli@result<-s_eGO1[c(6,15,32),]
cnetplot(s_ego1loli, color.params = list(foldChange = foldchanges),
color_category="purple")
```

```{r IM DOWN GROUP GO}
ggo1 <- groupGO(gene= dwnIM_genes,
               OrgDb= 'org.At.tair.db',
               ont= "BP",
               level = 3,
               keyType = "ENTREZID",
               readable= TRUE)
head(ggo1)
```


```{r IM DWN cenet plot GROUP GO in progress}
cnetplot(ggo1, showCategory=5,
                      categorySize="pvalue",
                      color.params = list(foldChange = IMdwn_tb$UMVsIM_log2FoldChange),
                      order=TRUE)


```

```{r IM dwn KEGG}
search_kegg_organism('ath', by='kegg_code')
search_kegg_organism('Arabidopsis thaliana', by='scientific_name')

# omit any NA values 
kegg_gene_list<-na.omit(dwnIM_genes)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(dwnIM_genes, decreasing = TRUE)

#ora with kegg data base
ora_analysis_kegg <- enrichKEGG(gene = kegg_gene_list,
                                universe =ALL_DEG_table,
                                organism = "ath",
                                keyType = "ncbi-geneid",
                                minGSSize = 10,
                                maxGSSize = 500,
                                pAdjustMethod = "BH",
                                qvalueCutoff = 0.01,
                                use_internal_data = FALSE) # force to query latest KEGG db
head(ora_analysis_kegg)

ora_analysis_kegg_modules <- enrichMKEGG(gene =kegg_gene_list,
                                         universe = ALL_DEG_table,
                                         organism = "ath",
                                         keyType = "ncbi-geneid",
                                         minGSSize = 10,           # minimal size of genes annotated by Ontology term for testing.
                                         maxGSSize = 500,          # maximal size of genes annotated for testing
                                         pAdjustMethod = "BH",
                                         qvalueCutoff = 0.01)
ggplot(ora_analysis_kegg)
#browseKEGG(ora_analysis_kegg, 'ath00195')
library("pathview")
#ath04145 <- pathview(gene.data  = allOE_genes, pathway.id = "ath00195", species    = "ath")

# create a simple dotplot graph
dotplot(ora_analysis_kegg, 
    color = "qvalue", 
    showCategory = 5, 
    size = "Count")


ridgeplot(ora_analysis_kegg, showCategory = 30) + labs(title = "KEGG_Pathway Analysis", x = "enrichment distribution") + scale_fill_gradient(low = "#56B1F7", high = "#132B43")

```

```{r GO vs KEGG}
mlist<-list(s_eGO1,ora_analysis_kegg)
names(mlist)<-c("GO-enrich","KEGG-enrich")
mresult<-merge_result(mlist)
dotplot(mresult,showCategory=5)

```


```{r UP REGULATED INFECTED MESOPHYLL}
IMup<- read_table("upLFC1_01_IM.csv",  col_names = TRUE)

###cleaning up table
IMup_tb <- IMup[ ,-c(4:5)] %>% 
  data.frame()  %>% 
  as_tibble() %>% 
  na.omit() %>%
  arrange(desc(UMVsIM_log2FoldChange))

###add information to table
IMup_tb$symbol = mapIds(org.At.tair.db, 
                    keys= IMup_tb$Row.names, 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")

IMup_tb$entrez = mapIds(org.At.tair.db, 
                    keys=IMup_tb$Row.names, 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
upIM_genes<- IMup_tb$entrez
```


```{r UP REG IM ENRICHED GO }
eGO2 <- enrichGO(gene         = upIM_genes,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                qvalueCutoff  = 0.01)

#dotplot(eGO2, x = "GeneRatio", showCategory = 10,label_format = 30 , color = "p.adjust")+ ggtitle("Dotplot for eGO")
#goplot(eGO2, color = "p.adjust")
#barplot(eGO2, color = "p.adjust")

### Output results from GO analysis to a table
#upIM_cluster_summary <- data.frame(eGO2)
#write.csv(upIM_cluster_summary, "eGO2IMclusterProfiler_oe.csv")

###Reduce redundancy
s_eGO2<-clusterProfiler::simplify(eGO2)
s_eGO2 #89 terms
dotplot(s_eGO2,showCategory=20,font.size=10,label_format=70)+
scale_size_continuous(range=c(1, 7))+
theme_minimal() +
ggtitle("GO Enrichment of up im-regulated genes")

mlist<-list(s_eGO1,s_eGO2)
names(mlist)<-c("IMdwn-GOenrich","IMUP-GOenrich")
mresult<-merge_result(mlist)
dotplot(mresult,showCategory=10)
```

```{r}
search_kegg_organism('ath', by='kegg_code')
search_kegg_organism('Arabidopsis thaliana', by='scientific_name')

# omit any NA values 
kegg_gene_list<-na.omit(upIM_genes)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(upIM_genes, decreasing = TRUE)


ora_analysis_kegg <- enrichKEGG(gene = kegg_gene_list,
                                universe =ALL_DEG_table,
                                organism = "ath",
                                keyType = "ncbi-geneid",
                                minGSSize = 10,
                                maxGSSize = 500,
                                pAdjustMethod = "BH",
                                qvalueCutoff = 0.01,
                                use_internal_data = FALSE) # force to query latest KEGG db
head(ora_analysis_kegg)

ora_analysis_kegg_modules <- enrichMKEGG(gene =kegg_gene_list,
                                         universe = ALL_DEG_table,
                                         organism = "ath",
                                         keyType = "ncbi-geneid",
                                         minGSSize = 10,           # minimal size of genes annotated by Ontology term for testing.
                                         maxGSSize = 500,          # maximal size of genes annotated for testing
                                         pAdjustMethod = "BH",
                                         qvalueCutoff = 0.01)
ggplot(ora_analysis_kegg)
#browseKEGG(ora_analysis_kegg, 'ath00195')
library("pathview")
#ath04145 <- pathview(gene.data  = allOE_genes, pathway.id = "ath00195", species    = "ath")

# create a simple dotplot graph
dotplot(ora_analysis_kegg, 
    color = "qvalue", 
    showCategory = 5, 
    size = "Count")


ridgeplot(ora_analysis_kegg, showCategory = 30) + labs(title = "KEGG_Pathway Analysis", x = "enrichment distribution") + scale_fill_gradient(low = "#56B1F7", high = "#132B43")

```

```{r}
gGO2 <- groupGO(gene     = upIM_genes,
               OrgDb    = 'org.At.tair.db',
               ont      = "BP",
               level    = 3,
               readable = TRUE)

head(gGO2)
```


```{r DOWN REGULATED HAUST CELLS}
Hdwn<- read_table("downLFC1_0.01_H.csv", col_names = TRUE)

###cleaning up table: remove columns, add column names, remove all genes with na values, and duplicate genes
Hdwn_tb <- Hdwn[ ,-c(2:3)] %>% 
  data.frame()  %>% 
  as_tibble() %>% 
  na.omit() %>%
  arrange(desc(UMVsH_log2FoldChange))

###add information to table
Hdwn_tb$symbol = mapIds(org.At.tair.db, 
                    keys= Hdwn_tb$Row.names, 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")

Hdwn_tb$entrez = mapIds(org.At.tair.db, 
                    keys=Hdwn_tb$Row.names, 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
Hdwn_genes<- Hdwn_tb$entrez
```


```{r DOWN REGULATED HAUST CELLS GO}
eGO3 <- enrichGO(gene         = Hdwn_genes,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                qvalueCutoff  = 0.01)

#dotplot(eGO3, x = "GeneRatio", showCategory = 10,label_format = 30 , color = "p.adjust")+ ggtitle("Dotplot for eGO")
#goplot(eGO3, color = "p.adjust")
#barplot(eGO3, color = "p.adjust")

### Output results from GO analysis to a table
#DwnH_cluster_summary <- data.frame(eGO3)
#write.csv(DwnH_cluster_summary, "eGO3_dwnHclusterProfiler_oe.csv")

s_eGO3<-clusterProfiler::simplify(eGO3)
s_eGO3 #89 terms
dotplot(s_eGO3,showCategory=20,font.size=10,label_format=70)+
scale_size_continuous(range=c(1, 7))+
theme_minimal() +
ggtitle("GO Enrichment of down im-regulated genes")
```


```{r}
gGO3 <- groupGO(gene     = Hdwn_genes,
               OrgDb    = 'org.At.tair.db',
               ont      = "BP",
               level    = 3,
               readable = TRUE)

head(gGO3)
```

```{r}
search_kegg_organism('ath', by='kegg_code')
search_kegg_organism('Arabidopsis thaliana', by='scientific_name')

# omit any NA values 
kegg_gene_list<-na.omit(Hdwn_genes)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(Hdwn_genes, decreasing = TRUE)


ora_analysis_kegg <- enrichKEGG(gene = kegg_gene_list,
                                universe =ALL_DEG_table,
                                organism = "ath",
                                keyType = "ncbi-geneid",
                                minGSSize = 10,
                                maxGSSize = 500,
                                pAdjustMethod = "BH",
                                qvalueCutoff = 0.01,
                                use_internal_data = FALSE) # force to query latest KEGG db
head(ora_analysis_kegg)

ora_analysis_kegg_modules <- enrichMKEGG(gene =kegg_gene_list,
                                         universe = ALL_DEG_table,
                                         organism = "ath",
                                         keyType = "ncbi-geneid",
                                         minGSSize = 10,           # minimal size of genes annotated by Ontology term for testing.
                                         maxGSSize = 500,          # maximal size of genes annotated for testing
                                         pAdjustMethod = "BH",
                                         qvalueCutoff = 0.01)
ggplot(ora_analysis_kegg)
#browseKEGG(ora_analysis_kegg, 'ath00195')
library("pathview")
#ath04145 <- pathview(gene.data  = allOE_genes, pathway.id = "ath00195", species    = "ath")

# create a simple dotplot graph
dotplot(ora_analysis_kegg, 
    color = "qvalue", 
    showCategory = 5, 
    size = "Count")


ridgeplot(ora_analysis_kegg, showCategory = 30) + labs(title = "KEGG_Pathway Analysis", x = "enrichment distribution") + scale_fill_gradient(low = "#56B1F7", high = "#132B43")

```

```{r UP REG Haustoriated Cells}
Hup<- read_table( "upLFC1_0.01_H.csv", col_names = TRUE)

###cleaning up table
Hup_tb <- Hup[ ,-c(2:3)] %>% 
  data.frame()  %>% 
  as_tibble() %>% 
  na.omit() %>%
  arrange(desc(UMVsH_log2FoldChange))

###add information to table
Hup_tb$symbol = mapIds(org.At.tair.db, 
                    keys= Hup_tb$Row.names, 
                    keytype= "TAIR" , 
                    column= "SYMBOL", 
                    multiVals= "first")
Hup_tb$entrez = mapIds(org.At.tair.db, 
                    keys=Hup_tb$Row.names, 
                    keytype="TAIR", 
                    column="ENTREZID", 
                    multiVals="first")
upH_genes<- Hup_tb$entrez
```


```{r UP REG Haustoriated Cells GO}
eGO4 <- enrichGO(gene         = upH_genes,
                OrgDb         = org.At.tair.db,
                keyType       = 'ENTREZID',
                ont           = "BP",
                pAdjustMethod = "BH",
                qvalueCutoff  = 0.01)

#dotplot(eGO4, x = "GeneRatio", showCategory = 10,label_format = 30 , color = "p.adjust")+ ggtitle("Dotplot for eGO")
#goplot(eGO4, color = "p.adjust")
#barplot(eGO4, color = "p.adjust")

### Output results from GO analysis to a table
#upH_cluster_summary <- data.frame(eGO4)
#write.csv(upH_cluster_summary, "eGO4_upHclusterProfiler_oe.csv")

s_eGO4<-clusterProfiler::simplify(eGO4)
s_eGO4 #89 terms
dotplot(s_eGO4,showCategory=20,font.size=10,label_format=70)+
scale_size_continuous(range=c(1, 7))+
theme_minimal() +
ggtitle("GO Enrichment of down im-regulated genes")
               
gGO4 <- groupGO(gene     = upH_genes,
               OrgDb    = 'org.At.tair.db',
               ont      = "BP",
               level    = 3,
               readable = TRUE)

#head(gGO4)
```

```{r}
search_kegg_organism('ath', by='kegg_code')
search_kegg_organism('Arabidopsis thaliana', by='scientific_name')

# omit any NA values 
kegg_gene_list<-na.omit(upH_genes)

# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(upH_genes, decreasing = TRUE)


ora_analysis_kegg <- enrichKEGG(gene = kegg_gene_list,
                                universe =ALL_DEG_table,
                                organism = "ath",
                                keyType = "ncbi-geneid",
                                minGSSize = 10,
                                maxGSSize = 500,
                                pAdjustMethod = "BH",
                                qvalueCutoff = 0.01,
                                use_internal_data = FALSE) # force to query latest KEGG db
head(ora_analysis_kegg)

ora_analysis_kegg_modules <- enrichMKEGG(gene =kegg_gene_list,
                                         universe = ALL_DEG_table,
                                         organism = "ath",
                                         keyType = "ncbi-geneid",
                                         minGSSize = 10,           # minimal size of genes annotated by Ontology term for testing.
                                         maxGSSize = 500,          # maximal size of genes annotated for testing
                                         pAdjustMethod = "BH",
                                         qvalueCutoff = 0.01)
ggplot(ora_analysis_kegg)
#browseKEGG(ora_analysis_kegg, 'ath00195')
library("pathview")
#ath04145 <- pathview(gene.data  = allOE_genes, pathway.id = "ath00195", species    = "ath")

# create a simple dotplot graph
dotplot(ora_analysis_kegg, 
    color = "qvalue", 
    showCategory = 5, 
    size = "Count")


ridgeplot(ora_analysis_kegg, showCategory = 30) + labs(title = "KEGG_Pathway Analysis", x = "enrichment distribution") + scale_fill_gradient(low = "#56B1F7", high = "#132B43")

```

```{r}
mlist<-list(s_eGO4,s_eGO3)
names(mlist)<-c("upH-GOenrich","DwnH-GOenrich")
mresult<-merge_result(mlist)
dotplot(mresult,showCategory=10)
```


```{r}
library(gplots)
library(VennDiagram)
library(ggvenn)
 
# Create a list of sets for the Venn diagram
venn_list <- list(
  dwnIM = ego1$Description,
  upIM = eGO2$Description,
  hdwn = eGO3$Description,
  hUP = eGO4$Description
)
ggvenn(venn_list, show_elements = T, label_sep = "\n",text_size= 4,
  stroke_size = 0.5, set_name_size = 4)

list_df<- (list_to_data_frame(venn_list))

library(DT)
DT::datatable(list_df)


 
# Create a list of sets for the Venn diagram
#venn_list <- list(
dwnIM <- c(ego1$Description[1:10])
upIM <- c(eGO2$Description[1:10])
hdwn <- c(eGO3$Description[1:10])
hUP <- c(eGO4$Description[1:10])

venn_list<- list(dwnIM,upIM,hdwn,hUP)
view(venn_list)
ggvenn(venn_list, c("dwnIM", "upIM", "hdwn", "hUP"), show_elements = T, label_sep = "\n",text_size= 4,
  stroke_size = 0.5, set_name_size = 4)

list_df<- (list_to_data_frame(venn_list))

library(DT)
DT::datatable(list_df)
```


```{r}
#Go Enriched DOWN
mlist<-list(s_eGO1,s_eGO3)
names(mlist)<-c("IMdwn-GOenrich","DwnH-GOenrich")
mresult1<-merge_result(mlist)
dotplot(mresult1,showCategory=10)

mlist<-list(s_eGO4,s_eGO2)
names(mlist)<-c("upH-GOenrich","IMup-GOenrich")
mresult2<-merge_result(mlist)
dotplot(mresult2,showCategory=10)
```


```{r ENRICHED GO BAR GRAPHs}
library(patchwork)
p1<-barplot(eGO1, plot_type="bar")
p2<-barplot(eGO2, plot_type="bar", term_metric="GeneRatio", stats_metric="pvalue")
p3<-barplot(eGO3, plot_type="bar", up_color="#E69056", down_color ="#325CAC")
p4<-barplot(eGO4, plot_type="bar", wrap_length=25)
p1+p2+p3+p4+plot_annotation(tag_levels = "A")
```

```{r ENRICHED GO BAR GRAPH}
library(patchwork)
p1<-barplot(s_eGO1, plot_type="bar")
p2<-barplot(s_eGO2, plot_type="bar", term_metric="GeneRatio", stats_metric="pvalue")
p3<-barplot(s_eGO3, plot_type="bar", up_color="#E69056", down_color ="#325CAC")
p4<-barplot(s_eGO4, plot_type="bar", wrap_length=25)
p1+p2+p3+p4+plot_annotation(tag_levels = "A")
```


```{r ENRICHED GO DOT PLOT}

library(patchwork)
p1<-dotplot(eGO1, showCategory = 5, size = "Count")
p2<-dotplot(eGO2, showCategory = 5, size = "Count")
p3<-dotplot(eGO3, showCategory = 5, size = "Count")
p4<-dotplot(eGO4, showCategory = 5, size = "Count")
p1+p2+p3+p4+plot_annotation(tag_levels="A")
```

```{r}
library(patchwork)
p1<-goplot(eGO1, showCategory = 2, size = "Count")
p2<-goplot(eGO2, showCategory = 2, size = "Count")
p3<-goplot(eGO3, showCategory = 2, size = "Count")
p4<-goplot(eGO4, showCategory = 2, size = "Count")
p1+p2+p3+p4+plot_annotation(tag_levels="A")
```



```{r}
library(DOSE)
#enrich go plot
#pairwise termsim gets the similarity matrix, method of calculating the similarity between nodes, one of "Resnik", "Lin", "Rel", "Jiang" , "Wang" and "JC"(Jaccard similarity coefficient) methods
pwt <- pairwise_termsim(ALL_DEG_table,
                        method = "JC",
                        semData = NULL,
                        showCategory = 50)
emapplot(pwt, showCategory = 10, color = "qvalue")
p1 <- treeplot(pwt,) #The default agglomeration method in treeplot() is ward.D
p2 <- treeplot(pwt, hclust_method = "average")
aplot::plot_list(p1, p2, tag_levels='A')

#Enrichment map organises enriched terms into a network with edges weighted by the ratio of overlapping gene sets. Mutually overlapping gene sets are tending to cluster together, making it easy to identify functional modules.

p2 <- emapplot(pwt, showCategory = 10, cex_category=1.5)
p3 <- emapplot(pwt, showCategory = 10,layout="kk")
p4 <- emapplot(pwt, showCategory = 10, cex_category=1.5, layout="kk") 
cowplot::plot_grid( p2, p3, p4, ncol=2, labels=LETTERS[1:4])
```

```{r n progress not working}
library(DOSE)
## To color genes by log2 fold changes, we need to extract the log2 fold changes 
OE_foldchanges <- ALL_DEG_table$log2FoldChange
names(OE_foldchanges) <- ALL_DEG_table$gene

## Cnetplot details the genes associated with one or more terms
cnetplot(ALL_DEG_table,
         categorySize="pvalue",
         showCategory = 5,
         foldChange=OE_foldchanges,
         vertex.label.font=6)


OE_foldchanges <- ifelse(OE_foldchanges > 1, 1, OE_foldchanges)
OE_foldchanges <- ifelse(OE_foldchanges < -1, -1, OE_foldchanges)
cnetplot(ALL_DEG_table,
         categorySize="pvalue",
         showCategory = 5,
         foldChange=OE_foldchanges,
         vertex.label.font=6)
heatplot(ALL_DEG_table, showCategory = 5) #relationship between selected genes and corresponding biological concepts.
```


```{r in prog not working}
library(KEGGREST)
pathways <- keggList("pathway")
head(pathways)

library(pathview)
map <- gsub("ath04814", "", names(pathways)[2])  # remove 'path:'
pv.out <- pathview(gene.data = ALL_DEG_table, cpd.data = ALL_DEG_table, gene.idtype = "KEGG", 
    pathway.id = map, species = "ath", out.suffix = map, keys.align = "y", 
    kegg.native = T, match.data = T, key.pos = "topright")
plot.name <- paste(map, map, "png", sep = ".")
```

